Read in, format, and visualize data.

# Read in target data

target_data <- readr::read_csv("https://data.ecoforecast.org/targets/terrestrial/terrestrial_30min-targets.csv.gz", guess_max = 1e6)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   time = col_datetime(format = ""),
##   siteID = col_character(),
##   nee = col_double(),
##   le = col_double(),
##   nee_sd_intercept = col_double(),
##   nee_sd_slopeP = col_double(),
##   nee_sd_slopeN = col_double(),
##   le_sd_intercept = col_double(),
##   le_sd_slopeP = col_double(),
##   le_sd_slopeN = col_double(),
##   vswc = col_double(),
##   vswc_sd = col_double()
## )
# Read in gap-filled temperature data

temp_inSW_data <- read.csv(text = getURL("https://raw.githubusercontent.com/eco4cast-class-VT/VT_NEET/master/temp_inSW_gapfill.csv"))

# Convert time column from character to POSIXct

# Time zone in target data is UTC according to https://docs.google.com/document/d/1l7sxBk-z-GHTlk50rdxP0lPTwJzFJ2gykclINkMsWcc/edit

target_data$time <- ymd_hms(target_data$time, tz = "UTC")
temp_inSW_data$startDateTime <- ymd_hms(temp_inSW_data$startDateTime, tz = "UTC")

# Convert siteID column from character to factor

target_data$siteID <- as.factor(target_data$siteID)
temp_inSW_data$siteID <- as.factor(temp_inSW_data$siteID)

# Plot all data

ggplot(data = target_data) +
  geom_point(aes(x = time, y = nee, color = siteID)) +
  facet_grid(siteID ~ .) + 
  labs(title = "Full Time Period: 01 Feb 2017 - 31 Mar 2021",
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw() +
  theme(legend.position = "none")

# Plot data before forecast time period

ggplot(data = filter(target_data,
                     time >= "2021-03-01",
                     time < "2021-04-01")) +
  geom_point(aes(x = time, y = nee, color = siteID)) +
  facet_grid(siteID ~ .) + 
  labs(title = "Example Time Period: 01 Mar 2021 - 31 Mar 2021",
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw() +
  theme(legend.position = "none")

Construct dynamic linear model (DLM) for NIMBLE.

We used the following model structure:

\[NEE(t) = \beta_1 \times NEE(t-1) + \beta_2 \times \Delta SW_{incoming} + \beta_3 \times \Delta temp + \beta_4\]

TempDLM <- nimbleCode({
  
  # Note:
  # x = real NEE (state variable)
  # y = observed NEE
  
  # Priors
  
  x[1] ~ dnorm(x_ic, sd = sd_ic)
  sd_add ~ dunif(0, 100)
  b1 ~ dunif(0, 1)  # "Uncertainty stabilization" parameter
  b2 ~ dunif(-10, 0)  # Incoming SW radiation effect coefficient
  b3 ~ dunif(0, 10)  # Temperature effect coefficient
  b4 ~ dunif(-50, 50)  # Intercept

  # Process model
  
  for(t in 2:n){
    pred[t] <- b1 * x[t - 1] + b2 * (inSW[t] - inSW[t - 1]) + b3 * (temp[t] - temp[t - 1]) + b4
    x[t] ~ dnorm(pred[t], sd = sd_add)
  }
  
  # Data model
  
  for(t in 1:n){
    y[t] ~ dnorm(x[t], sd = sd_obs[t])
  }
  
})

Because of the size of the data set, a subset of the data had to be used for model fitting. For forecasting, the data subset consisted of data from the time period immediately before the forecast period. The variables controlling the size of the data subset are specified below.

# Note: B/c of size of data set, need to use subset of data

subset_length_days <- 21  # Length of subset [d]

subset_length_timesteps <- subset_length_days * 24 * 2  # Number of time steps in subset (days x hours/day x half hours/hour)

Specify data from BART to be used for model fitting.

# Separate data by site

target_data_BART <- filter(target_data, siteID == "BART")
temp_data_BART <- filter(temp_inSW_data, siteID == "BART")

# Scale data

sd_temp_data_BART <- sd(temp_data_BART$temp)
sd_inSW_data_BART <- sd(temp_data_BART$inSW)

temp_data_BART$temp <- temp_data_BART$temp/sd_temp_data_BART
temp_data_BART$inSW <- temp_data_BART$inSW/sd_inSW_data_BART

# Specify time period end date and time

end_date_time_BART <- ymd_hms("2021-03-31 23:30:00", tz = "UTC")

# Match time period end date and time to index value

end_i_BART <- match(end_date_time_BART, target_data_BART$time)

# Calculate time period start data and time index value

start_i_BART <- end_i_BART - subset_length_timesteps + 1

# Subset data

target_data_BART_sub <- target_data_BART[start_i_BART:end_i_BART,]

# Interpolate NEE values to estimate sd_obs

nee_interp_BART_sub <- approx(x = target_data_BART_sub$time[!is.na(target_data_BART_sub$nee)],
                              y = target_data_BART_sub$nee[!is.na(target_data_BART_sub$nee)],
                              xout = target_data_BART_sub$time,
                              method = "linear",
                              yleft = mean(target_data_BART_sub$nee, na.rm = TRUE),
                              yright = mean(target_data_BART_sub$nee, na.rm = TRUE),
                              na.rm = TRUE)

sd_obs_BART_sub <- rep(NA, length(target_data_BART_sub$nee))  # Initialize sd_obs vector

for(i in 1:length(sd_obs_BART_sub)){
  
  if(nee_interp_BART_sub$y[i] >= 0){  # Calculate sd_obs if NEE is positive
    
    sd_obs_BART_sub[i] <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$ nee_sd_slopeP[1] * nee_interp_BART_sub$y[i]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
    
  }else{  # Calculate sd_obs if NEE is negative
    
    sd_obs_BART_sub[i] <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$ nee_sd_slopeN[1] * nee_interp_BART_sub$y[i]
    
  }
  
}

# Match temperature data to subset NEE data

BART_temp_i <- match(target_data_BART_sub$time, temp_data_BART$startDateTime)
temp_data_BART_sub <- temp_data_BART[BART_temp_i,]

# Specify sd_obs modifier (to account for lower NEE in winter)

sd_obs_mod_BART <- 0.75

# Specify data

data_TempDLM_BART <- list(y = target_data_BART_sub$nee,
                          inSW = temp_data_BART_sub$inSW,
                          temp = temp_data_BART_sub$temp,
                          sd_obs = sd_obs_BART_sub * sd_obs_mod_BART)

Specify data from KONZ to be used for model fitting.

# Separate data by site

target_data_KONZ <- filter(target_data, siteID == "KONZ")
temp_data_KONZ <- filter(temp_inSW_data, siteID == "KONZ")

# Scale data

sd_temp_data_KONZ <- sd(temp_data_KONZ$temp)
sd_inSW_data_KONZ <- sd(temp_data_KONZ$inSW)

temp_data_KONZ$temp <- temp_data_KONZ$temp/sd_temp_data_KONZ
temp_data_KONZ$inSW <- temp_data_KONZ$inSW/sd_inSW_data_KONZ

# Specify time period end date and time

end_date_time_KONZ <- ymd_hms("2021-03-31 23:30:00", tz = "UTC")

# Match time period end date and time to index value

end_i_KONZ <- match(end_date_time_KONZ, target_data_KONZ$time)

# Calculate time period start data and time index value

start_i_KONZ <- end_i_KONZ - subset_length_timesteps + 1

# Subset data

target_data_KONZ_sub <- target_data_KONZ[start_i_KONZ:end_i_KONZ,]

# Interpolate NEE values to estimate sd_obs

nee_interp_KONZ_sub <- approx(x = target_data_KONZ_sub$time[!is.na(target_data_KONZ_sub$nee)],
                              y = target_data_KONZ_sub$nee[!is.na(target_data_KONZ_sub$nee)],
                              xout = target_data_KONZ_sub$time,
                              method = "linear",
                              yleft = mean(target_data_KONZ_sub$nee, na.rm = TRUE),
                              yright = mean(target_data_KONZ_sub$nee, na.rm = TRUE),
                              na.rm = TRUE)

sd_obs_KONZ_sub <- rep(NA, length(target_data_KONZ_sub$nee))  # Initialize sd_obs vector

for(i in 1:length(sd_obs_KONZ_sub)){
  
  if(nee_interp_KONZ_sub$y[i] >= 0){  # Calculate sd_obs if NEE is positive
    
    sd_obs_KONZ_sub[i] <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$ nee_sd_slopeP[1] * nee_interp_KONZ_sub$y[i]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
    
  }else{  # Calculate sd_obs if NEE is negative
    
    sd_obs_KONZ_sub[i] <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$ nee_sd_slopeN[1] * nee_interp_KONZ_sub$y[i]
    
  }
  
}

# Match temperature data to subset NEE data

KONZ_temp_i <- match(target_data_KONZ_sub$time, temp_data_KONZ$startDateTime)
temp_data_KONZ_sub <- temp_data_KONZ[KONZ_temp_i,]

# Specify sd_obs modifier (to account for lower NEE in winter)

sd_obs_mod_KONZ <- 0.15

# Specify data

data_TempDLM_KONZ <- list(y = target_data_KONZ_sub$nee,
                          inSW = temp_data_KONZ_sub$inSW,
                          temp = temp_data_KONZ_sub$temp,
                          sd_obs = sd_obs_KONZ_sub * sd_obs_mod_KONZ)

Specify data from OSBS to be used for model fitting.

# Separate data by site

target_data_OSBS <- filter(target_data, siteID == "OSBS")
temp_data_OSBS <- filter(temp_inSW_data, siteID == "OSBS")

# Scale data

sd_temp_data_OSBS <- sd(temp_data_OSBS$temp)
sd_inSW_data_OSBS <- sd(temp_data_OSBS$inSW)

temp_data_OSBS$temp <- temp_data_OSBS$temp/sd_temp_data_OSBS
temp_data_OSBS$inSW <- temp_data_OSBS$inSW/sd_inSW_data_OSBS

# Specify time period end date and time

end_date_time_OSBS <- ymd_hms("2021-03-31 23:30:00", tz = "UTC")

# Match time period end date and time to index value

end_i_OSBS <- match(end_date_time_OSBS, target_data_OSBS$time)

# Calculate time period start data and time index value

start_i_OSBS <- end_i_OSBS - subset_length_timesteps + 1

# Subset data

target_data_OSBS_sub <- target_data_OSBS[start_i_OSBS:end_i_OSBS,]

# Interpolate NEE values to estimate sd_obs

nee_interp_OSBS_sub <- approx(x = target_data_OSBS_sub$time[!is.na(target_data_OSBS_sub$nee)],
                              y = target_data_OSBS_sub$nee[!is.na(target_data_OSBS_sub$nee)],
                              xout = target_data_OSBS_sub$time,
                              method = "linear",
                              yleft = mean(target_data_OSBS_sub$nee, na.rm = TRUE),
                              yright = mean(target_data_OSBS_sub$nee, na.rm = TRUE),
                              na.rm = TRUE)

sd_obs_OSBS_sub <- rep(NA, length(target_data_OSBS_sub$nee))  # Initialize sd_obs vector

for(i in 1:length(sd_obs_OSBS_sub)){
  
  if(nee_interp_OSBS_sub$y[i] >= 0){  # Calculate sd_obs if NEE is positive
    
    sd_obs_OSBS_sub[i] <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$ nee_sd_slopeP[1] * nee_interp_OSBS_sub$y[i]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
    
  }else{  # Calculate sd_obs if NEE is negative
    
    sd_obs_OSBS_sub[i] <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$ nee_sd_slopeN[1] * nee_interp_OSBS_sub$y[i]
    
  }
  
}

# Match temperature data to subset NEE data

OSBS_temp_i <- match(target_data_OSBS_sub$time, temp_data_OSBS$startDateTime)
temp_data_OSBS_sub <- temp_data_OSBS[OSBS_temp_i,]

# Specify sd_obs modifier (generally not needed for OSBS b/c of relatively high NEE in winter)

sd_obs_mod_OSBS <- 0.85

# Specify data

data_TempDLM_OSBS <- list(y = target_data_OSBS_sub$nee,
                          inSW = temp_data_OSBS_sub$inSW,
                          temp = temp_data_OSBS_sub$temp,
                          sd_obs = sd_obs_OSBS_sub * sd_obs_mod_OSBS)

Specify data from SRER to be used for model fitting.

# Separate data by site

target_data_SRER <- filter(target_data, siteID == "SRER")
temp_data_SRER <- filter(temp_inSW_data, siteID == "SRER")

# Scale data

sd_temp_data_SRER <- sd(temp_data_SRER$temp)
sd_inSW_data_SRER <- sd(temp_data_SRER$inSW)

temp_data_SRER$temp <- temp_data_SRER$temp/sd_temp_data_SRER
temp_data_SRER$inSW <- temp_data_SRER$inSW/sd_inSW_data_SRER

# Specify time period end date and time

end_date_time_SRER <- ymd_hms("2021-03-31 23:30:00", tz = "UTC")

# Match time period end date and time to index value

end_i_SRER <- match(end_date_time_SRER, target_data_SRER$time)

# Calculate time period start data and time index value

start_i_SRER <- end_i_SRER - subset_length_timesteps + 1

# Subset data

target_data_SRER_sub <- target_data_SRER[start_i_SRER:end_i_SRER,]

# Interpolate NEE values to estimate sd_obs

nee_interp_SRER_sub <- approx(x = target_data_SRER_sub$time[!is.na(target_data_SRER_sub$nee)],
                              y = target_data_SRER_sub$nee[!is.na(target_data_SRER_sub$nee)],
                              xout = target_data_SRER_sub$time,
                              method = "linear",
                              yleft = mean(target_data_SRER_sub$nee, na.rm = TRUE),
                              yright = mean(target_data_SRER_sub$nee, na.rm = TRUE),
                              na.rm = TRUE)

sd_obs_SRER_sub <- rep(NA, length(target_data_SRER_sub$nee))  # Initialize sd_obs vector

for(i in 1:length(sd_obs_SRER_sub)){
  
  if(nee_interp_SRER_sub$y[i] >= 0){  # Calculate sd_obs if NEE is positive
    
    sd_obs_SRER_sub[i] <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$ nee_sd_slopeP[1] * nee_interp_SRER_sub$y[i]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
    
  }else{  # Calculate sd_obs if NEE is negative
    
    sd_obs_SRER_sub[i] <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$ nee_sd_slopeN[1] * nee_interp_SRER_sub$y[i]
    
  }
  
}

# Match temperature data to subset NEE data

SRER_temp_i <- match(target_data_SRER_sub$time, temp_data_SRER$startDateTime)
temp_data_SRER_sub <- temp_data_SRER[SRER_temp_i,]

# Specify sd_obs modifier (to account for lower NEE in winter)

sd_obs_mod_SRER <- 0.25

# Specify data

data_TempDLM_SRER <- list(y = target_data_SRER_sub$nee,
                          inSW = temp_data_SRER_sub$inSW,
                          temp = temp_data_SRER_sub$temp,
                          sd_obs = sd_obs_SRER_sub * sd_obs_mod_SRER)

Specify constants for NIMBLE.

# Specify constants

constants_BART <- list(n = length(target_data_BART_sub$nee),
                       x_ic = 0,
                       sd_ic = 1)

constants_KONZ <- list(n = length(target_data_KONZ_sub$nee),
                       x_ic = 0,
                       sd_ic = 1)

constants_OSBS <- list(n = length(target_data_OSBS_sub$nee),
                       x_ic = 0,
                       sd_ic = 1)

constants_SRER <- list(n = length(target_data_SRER_sub$nee),
                       x_ic = 0,
                       sd_ic = 1)

Specify initial conditions for temperature DLM.

# Specify number of chains

nchains = 3

# Initialize initial condition lists

inits_TempDLM_BART <- list()
inits_TempDLM_KONZ <- list()
inits_TempDLM_OSBS <- list()
inits_TempDLM_SRER <- list()

# Generate initial conditions

for(i in 1:nchains){
  
  # BART
  
  y.samp <- sample(nee_interp_BART_sub$y, length(nee_interp_BART_sub$y), replace = TRUE)
  inits_TempDLM_BART[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
                                  x = nee_interp_BART_sub$y)
  
  # KONZ
  
  y.samp <- sample(nee_interp_KONZ_sub$y, length(nee_interp_KONZ_sub$y), replace = TRUE)
  inits_TempDLM_KONZ[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
                                  x = nee_interp_KONZ_sub$y)
  
  # OSBS
  
  y.samp <- sample(nee_interp_OSBS_sub$y, length(nee_interp_OSBS_sub$y), replace = TRUE)
  inits_TempDLM_OSBS[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
                                  x = nee_interp_OSBS_sub$y)
  
  # SRER
  
  y.samp <- sample(nee_interp_SRER_sub$y, length(nee_interp_SRER_sub$y), replace = TRUE)
  inits_TempDLM_SRER[[i]] <- list(sd_add = sd(diff(y.samp), na.rm = TRUE),
                                  x = nee_interp_SRER_sub$y)
  
}

Run NIMBLE for temperature DLM.

# Preliminaries

niter <- 11000
nthin <- 1

# BART

nimble_out_TempDLM_BART <- nimbleMCMC(code = TempDLM,
                                      data = data_TempDLM_BART,
                                      inits = inits_TempDLM_BART,
                                      constants = constants_BART,
                                      monitors = c("b1",
                                                   "b2",
                                                   "b3",
                                                   "b4",
                                                   "sd_add",
                                                   "x",
                                                   "y"),
                                      niter = niter,
                                      nchains = nchains,
                                      thin = nthin,
                                      samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: b1, logProb_b1, b2, logProb_b2, b3, logProb_b3, b4, logProb_b4, pred, y, logProb_y, logProb_x.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# KONZ

nimble_out_TempDLM_KONZ <- nimbleMCMC(code = TempDLM,
                                      data = data_TempDLM_KONZ,
                                      inits = inits_TempDLM_KONZ,
                                      constants = constants_KONZ,
                                      monitors = c("b1",
                                                   "b2",
                                                   "b3",
                                                   "b4",
                                                   "sd_add",
                                                   "x",
                                                   "y"),
                                      niter = niter,
                                      nchains = nchains,
                                      thin = nthin,
                                      samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: b1, logProb_b1, b2, logProb_b2, b3, logProb_b3, b4, logProb_b4, pred, y, logProb_y, logProb_x.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# OSBS

nimble_out_TempDLM_OSBS <- nimbleMCMC(code = TempDLM,
                                      data = data_TempDLM_OSBS,
                                      inits = inits_TempDLM_OSBS,
                                      constants = constants_OSBS,
                                      monitors = c("b1",
                                                   "b2",
                                                   "b3",
                                                   "b4",
                                                   "sd_add",
                                                   "x",
                                                   "y"),
                                      niter = niter,
                                      nchains = nchains,
                                      thin = nthin,
                                      samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: b1, logProb_b1, b2, logProb_b2, b3, logProb_b3, b4, logProb_b4, pred, logProb_x, y, logProb_y.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
# SRER

nimble_out_TempDLM_SRER <- nimbleMCMC(code = TempDLM,
                                      data = data_TempDLM_SRER,
                                      inits = inits_TempDLM_SRER,
                                      constants = constants_SRER,
                                      monitors = c("b1",
                                                   "b2",
                                                   "b3",
                                                   "b4",
                                                   "sd_add",
                                                   "x",
                                                   "y"),
                                      niter = niter,
                                      nchains = nchains,
                                      thin = nthin,
                                      samplesAsCodaMCMC = TRUE)
## NAs were detected in model variables: b1, logProb_b1, b2, logProb_b2, b3, logProb_b3, b4, logProb_b4, pred, logProb_x, y, logProb_y.
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|

Visualize chains and remove burn-in for temperature DLM.

# BART

# plot(nimble_out_TempDLM_BART[, c("b1")], main = "BART: b1")  # Visualize all chains
# plot(nimble_out_TempDLM_BART[, c("sd_add")], main = "BART: sd_add")  # Visualize all chains
# burnin_TempDLM_BART <- 2000  # Burn-in
# nimble_burn_TempDLM_BART <- window(nimble_out_TempDLM_BART, start = burnin_TempDLM_BART)  # Exclude burn-in
# plot(nimble_burn_TempDLM_BART[, c("b1")], main = "BART: b1 (burn-in removed)")  # Visualize chains w/o burn-in
# plot(nimble_burn_TempDLM_BART[, c("sd_add")], main = "BART: sd_add (burn-in removed)")  # Visualize chains w/o burn-in
# 
# # KONZ
# 
# plot(nimble_out_TempDLM_KONZ[, c("b1")], main = "KONZ: b1")  # Visualize all chains
# plot(nimble_out_TempDLM_KONZ[, c("sd_add")], main = "KONZ: sd_add")  # Visualize all chains
# burnin_TempDLM_KONZ <- 2000  # Burn-in
# nimble_burn_TempDLM_KONZ <- window(nimble_out_TempDLM_KONZ, start = burnin_TempDLM_KONZ)  # Exclude burn-in
# plot(nimble_burn_TempDLM_KONZ[, c("b1")], main = "KONZ: b1 (burn-in removed)")  # Visualize chains w/o burn-in
# plot(nimble_burn_TempDLM_KONZ[, c("sd_add")], main = "KONZ: sd_add (burn-in removed)")  # Visualize chains w/o burn-in
# 
# # OSBS
# 
# plot(nimble_out_TempDLM_OSBS[, c("b1")], main = "OSBS: b1")  # Visualize all chains
# plot(nimble_out_TempDLM_OSBS[, c("sd_add")], main = "OSBS: sd_add")  # Visualize all chains
# burnin_TempDLM_OSBS <- 2000  # Burn-in
# nimble_burn_TempDLM_OSBS <- window(nimble_out_TempDLM_OSBS, start = burnin_TempDLM_OSBS)  # Exclude burn-in
# plot(nimble_burn_TempDLM_OSBS[, c("b1")], main = "OSBS: b1 (burn-in removed)")  # Visualize chains w/o burn-in
# plot(nimble_burn_TempDLM_OSBS[, c("sd_add")], main = "OSBS: sd_add (burn-in removed)")  # Visualize chains w/o burn-in
# 
# # SRER
# 
# plot(nimble_out_TempDLM_SRER[, c("b1")], main = "SRER: b1")  # Visualize all chains
# plot(nimble_out_TempDLM_SRER[, c("sd_add")], main = "SRER: sd_add")  # Visualize all chains
# burnin_TempDLM_SRER <- 2000  # Burn-in
# nimble_burn_TempDLM_SRER <- window(nimble_out_TempDLM_SRER, start = burnin_TempDLM_SRER)  # Exclude burn-in
# plot(nimble_burn_TempDLM_SRER[, c("b1")], main = "SRER: b1 (burn-in removed)")  # Visualize chains w/o burn-in
# plot(nimble_burn_TempDLM_SRER[, c("sd_add")], main = "SRER: sd_add (burn-in removed)")  # Visualize chains w/o burn-in


#Plot the NIMBLE chains:
PlotChain = function(siteID, nimbleOutput, burnin = 2000)
{
  #nimble_out_TempDLM_SRER = nimbleOutput
  #nimble_burn_TempDLM_SRER = nimbleMinusBurn
  
  #Visualize all chains:
  plot(nimbleOutput[, c("b1")], main = paste(siteID, ": b1", sep = ''))
  plot(nimbleOutput[, c("b2")], main = paste(siteID, ": b2", sep = ''))
  plot(nimbleOutput[, c("b3")], main = paste(siteID, ": b3", sep = ''))
  plot(nimbleOutput[, c("b4")], main = paste(siteID, ": b4", sep = ''))
  plot(nimbleOutput[, c("sd_add")], main = paste(siteID, ": sd_add", sep = ''))
  
  # Exclude burn-in:
  nimbleMinusBurn <- window(nimbleOutput, start = burnin)
  
  #Visualize chains w/o burn-in
  #plot(nimbleMinusBurn[, c("b1")], main = "SRER: b1 (burn-in removed)")
  #plot(nimbleMinusBurn[, c("sd_add")], main = "SRER: sd_add (burn-in removed)")
  plot(nimbleMinusBurn[, c("b1")], main = paste(siteID, ": b1 (burn-in removed)", sep = ''))
  plot(nimbleMinusBurn[, c("b2")], main = paste(siteID, ": b2 (burn-in removed)", sep = ''))
  plot(nimbleMinusBurn[, c("b3")], main = paste(siteID, ": b3 (burn-in removed)", sep = ''))
  plot(nimbleMinusBurn[, c("b4")], main = paste(siteID, ": b4 (burn-in removed)", sep = ''))
  plot(nimbleMinusBurn[, c("sd_add")],
       main = paste(siteID, "SRER: sd_add (burn-in removed)", sep = ''))
  
  #Needed below:
  #This is temporary.  We probably want to eliminate this...
  return(nimbleMinusBurn)
}

nimble_burn_TempDLM_BART = PlotChain(siteID = "BART", nimble_out_TempDLM_BART)

nimble_burn_TempDLM_KONZ = PlotChain(siteID = "KONZ", nimble_out_TempDLM_KONZ)

nimble_burn_TempDLM_OSBS = PlotChain(siteID = "OSBS", nimble_out_TempDLM_OSBS)

nimble_burn_TempDLM_SRER = PlotChain(siteID = "SRER", nimble_out_TempDLM_SRER)

Examine Brooks-Gelman-Rubin convergence diagnostic for temperature DLM.

Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is (approximately) less than 1.1 for all parameters (depending on the individual run), we can conclude the chains have converged for BART.

# BART

gelman.diag(nimble_burn_TempDLM_BART[, "b1"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.04       1.12
gelman.diag(nimble_burn_TempDLM_BART[, "b2"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1          1
gelman.diag(nimble_burn_TempDLM_BART[, "b3"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.01       1.03
gelman.diag(nimble_burn_TempDLM_BART[, "b4"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1       1.01
gelman.diag(nimble_burn_TempDLM_BART[, "sd_add"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1          1

Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is (approximately) less than 1.1 for all parameters (depending on the individual run), we can conclude the chains have converged for KONZ.

# KONZ

gelman.diag(nimble_burn_TempDLM_KONZ[, "b1"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.04       1.13
gelman.diag(nimble_burn_TempDLM_KONZ[, "b2"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1       1.01
gelman.diag(nimble_burn_TempDLM_KONZ[, "b3"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.01       1.03
gelman.diag(nimble_burn_TempDLM_KONZ[, "b4"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.06        1.2
gelman.diag(nimble_burn_TempDLM_KONZ[, "sd_add"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.01       1.03

Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is (approximately) less than 1.1 for all parameters (depending on the individual run), we can conclude the chains have converged for OSBS.

# OSBS

gelman.diag(nimble_burn_TempDLM_OSBS[, "b1"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.03       1.11
gelman.diag(nimble_burn_TempDLM_OSBS[, "b2"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.02       1.07
gelman.diag(nimble_burn_TempDLM_OSBS[, "b3"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1       1.01
gelman.diag(nimble_burn_TempDLM_OSBS[, "b4"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.01       1.02
gelman.diag(nimble_burn_TempDLM_OSBS[, "sd_add"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.07       1.23

Since the upper confidence interval (C.I.) for the Brooks-Gelman-Rubin convergence diagnostic is (approximately) less than 1.1 for all parameters (depending on the individual run), we can conclude the chains have converged for SRER.

# SRER

gelman.diag(nimble_burn_TempDLM_SRER[, "b1"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1          1
gelman.diag(nimble_burn_TempDLM_SRER[, "b2"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1          1
gelman.diag(nimble_burn_TempDLM_SRER[, "b3"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.01       1.02
gelman.diag(nimble_burn_TempDLM_SRER[, "b4"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.01       1.04
gelman.diag(nimble_burn_TempDLM_SRER[, "sd_add"])
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1       1.01

Convert temperature DLM output using tidybayes.

# BART

chain_TempDLM_BART <- nimble_burn_TempDLM_BART %>%
  spread_draws(y[day], x[day], b1, b2, b3, b4, sd_add)

# KONZ

chain_TempDLM_KONZ <- nimble_burn_TempDLM_KONZ %>%
  spread_draws(y[day], x[day], b1, b2, b3, b4, sd_add)

# OSBS

chain_TempDLM_OSBS <- nimble_burn_TempDLM_OSBS %>%
  spread_draws(y[day], x[day], b1, b2, b3, b4, sd_add)

# SRER

chain_TempDLM_SRER <- nimble_burn_TempDLM_SRER %>%
  spread_draws(y[day], x[day], b1, b2, b3, b4, sd_add)

Plot temperature DLM versus observations.

# plot_colors <- c("#F8766D", "#7CAE00", "#00BFC4", "#C77CFF")
# 
# # BART
# 
# plot_chain_TempDLM_BART <- chain_TempDLM_BART %>%
#   group_by(day) %>%
#   summarise(mean = mean(x),
#             lower = quantile(x, 0.025),
#             upper = quantile(x, 0.975),
#             .groups = "drop") %>%
#   mutate(time = target_data_BART_sub$time)
# 
# plot_TempDLM_BART <- ggplot(data = plot_chain_TempDLM_BART, aes(x = time, y = mean)) +
#   geom_line() +
#   geom_ribbon(aes(ymin = lower, ymax = upper),
#               alpha = 0.2,
#               color = plot_colors[1],
#               fill = plot_colors[1]) +
#   geom_point(data = target_data_BART_sub, aes(x = time, y = nee), color = "black") +
#   labs(title = paste0("Site: BART | Time Period: ", substr(target_data_BART_sub$time[1], 1, 10), " to ", substr(target_data_BART_sub$time[subset_length_timesteps], 1, 10)),
#        x = "Time (UTC)",
#        y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
#   theme_bw()
# 
# # KONZ
# 
# plot_chain_TempDLM_KONZ <- chain_TempDLM_KONZ %>%
#   group_by(day) %>%
#   summarise(mean = mean(x),
#             lower = quantile(x, 0.025),
#             upper = quantile(x, 0.975),
#             .groups = "drop") %>%
#   mutate(time = target_data_KONZ_sub$time)
# 
# plot_TempDLM_KONZ <- ggplot(data = plot_chain_TempDLM_KONZ, aes(x = time, y = mean)) +
#   geom_line() +
#   geom_ribbon(aes(ymin = lower, ymax = upper),
#               alpha = 0.2,
#               color = plot_colors[2],
#               fill = plot_colors[2]) +
#   geom_point(data = target_data_KONZ_sub, aes(x = time, y = nee), color = "black") +
#   labs(title = paste0("Site: KONZ | Time Period: ", substr(target_data_KONZ_sub$time[1], 1, 10), " to ", substr(target_data_KONZ_sub$time[subset_length_timesteps], 1, 10)),
#        x = "Time (UTC)",
#        y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
#   theme_bw()
# 
# # OSBS
# 
# plot_chain_TempDLM_OSBS <- chain_TempDLM_OSBS %>%
#   group_by(day) %>%
#   summarise(mean = mean(x),
#             lower = quantile(x, 0.025),
#             upper = quantile(x, 0.975),
#             .groups = "drop") %>%
#   mutate(time = target_data_OSBS_sub$time)
# 
# plot_TempDLM_OSBS <- ggplot(data = plot_chain_TempDLM_OSBS, aes(x = time, y = mean)) +
#   geom_line() +
#   geom_ribbon(aes(ymin = lower, ymax = upper),
#               alpha = 0.2,
#               color = plot_colors[3],
#               fill = plot_colors[3]) +
#   geom_point(data = target_data_OSBS_sub, aes(x = time, y = nee), color = "black") +
#   labs(title = paste0("Site: OSBS | Time Period: ", substr(target_data_OSBS_sub$time[1], 1, 10), " to ", substr(target_data_OSBS_sub$time[subset_length_timesteps], 1, 10)),
#        x = "Time (UTC)",
#        y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
#   theme_bw()
# 
# # SRER
# 
# plot_chain_TempDLM_SRER <- chain_TempDLM_SRER %>%
#   group_by(day) %>%
#   summarise(mean = mean(x),
#             lower = quantile(x, 0.025),
#             upper = quantile(x, 0.975),
#             .groups = "drop") %>%
#   mutate(time = target_data_SRER_sub$time)
# 
# plot_TempDLM_SRER <- ggplot(data = plot_chain_TempDLM_SRER, aes(x = time, y = mean)) +
#   geom_line() +
#   geom_ribbon(aes(ymin = lower, ymax = upper),
#               alpha = 0.2,
#               color = plot_colors[4],
#               fill = plot_colors[4]) +
#   geom_point(data = target_data_SRER_sub, aes(x = time, y = nee), color = "black") +
#   labs(title = paste0("Site: SRER | Time Period: ", substr(target_data_SRER_sub$time[1], 1, 10), " to ", substr(target_data_SRER_sub$time[subset_length_timesteps], 1, 10)),
#        x = "Time (UTC)",
#        y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
#   theme_bw()
# 
# plot_TempDLM_BART
# plot_TempDLM_KONZ
# plot_TempDLM_OSBS
# plot_TempDLM_SRER

#Reorganized code:
#Untested because I can't connect to https://data.ecoforecast.org/ right now
plot_colors = c("BART" = "#F8766D", "KONZ" = "#7CAE00", "OSBS" = "#00BFC4", "SRER" = "#C77CFF")

#Perform statistical summaries on a chain:
#We do this outside PlotTemperatureDLMvsObs() because the result is used again below.
GetChainStats <- function(target_data_sub, chain_TempDLM)
{
  chainStatsSummary <- chain_TempDLM %>%
  group_by(day) %>%
  summarise(mean = mean(x),
            lower = quantile(x, 0.025),
            upper = quantile(x, 0.975),
            .groups = "drop") %>%
  mutate(time = target_data_sub$time)

  return(chainStatsSummary)
}

#The argument order would be better reversed.
#See PlotObsAndForecast() below, which is similar.
PlotTemperatureDLMvsObs <- function(siteID, plot_chain_TempDLM, target_data_sub)
{
  #plot_chain_TempDLM_SRER = plot_chain_TempDLM
  #target_data_SRER_sub -> target_data_sub

  plot = ggplot(plot_chain_TempDLM, aes(x = time, y = mean)) +
  geom_line() +
  geom_ribbon(aes(ymin = lower, ymax = upper),
              alpha = 0.2,
              color = plot_colors[siteID],
              fill = plot_colors[siteID]) +
  geom_point(data = target_data_sub, aes(x = time, y = nee), color = "black") +
  labs(title = paste("Site:", siteID, "| Time Period: ", substr(target_data_sub$time[1], 1, 10),
                     "to", substr(target_data_sub$time[subset_length_timesteps], 1, 10)),
       x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
  theme_bw()

  print(plot)
}

plot_chain_TempDLM_BART = GetChainStats(target_data_BART_sub, chain_TempDLM_BART)
PlotTemperatureDLMvsObs(site = "BART", plot_chain_TempDLM_BART, target_data_BART_sub)

plot_chain_TempDLM_KONZ = GetChainStats(target_data_KONZ_sub, chain_TempDLM_KONZ)
PlotTemperatureDLMvsObs(site = "KONZ", plot_chain_TempDLM_KONZ, target_data_KONZ_sub)

plot_chain_TempDLM_OSBS = GetChainStats(target_data_OSBS_sub, chain_TempDLM_OSBS)
PlotTemperatureDLMvsObs(site = "OSBS", plot_chain_TempDLM_OSBS, target_data_OSBS_sub)

plot_chain_TempDLM_SRER = GetChainStats(target_data_SRER_sub, chain_TempDLM_SRER)
PlotTemperatureDLMvsObs(site = "SRER", plot_chain_TempDLM_SRER, target_data_SRER_sub)

Generate forecasts.

# Forecast preliminaries

ensemble_size <- 1000  # Specify ensemble size

forecast_length <- 35 * 24 * 2 + 1 # Number of time steps in forecast

NOAA_temp_forecast <- read.csv(text = getURL("https://raw.githubusercontent.com/eco4cast-class-VT/VT_NEET/master/temp_fc30_april.csv"))  # Read in NOAA GEFS forecasts

Generate forecast for BART.

# Sample from temperature forecasts

NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "BART")  # Select NOAA forecasts for BART

temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
                         size = ensemble_size,
                         replace = TRUE)

forecast_inSW <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast incoming SW radiation matrix

forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast temperature matrix

for(i in 1:ensemble_size){
  
  forecast_inSW[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$inSW/sd_inSW_data_BART
  forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC/sd_temp_data_BART
}

# Sample from parameter posterior distribution

chain_TempDLM_last_time_step <- filter(chain_TempDLM_BART,  # Create tibble for MCMC results for last time step
                                       day == max(chain_TempDLM_BART$day))

posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
                                   size = ensemble_size,
                                   replace = TRUE)

NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices]  # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices]  # Extract sampled b1 values
b2 <- chain_TempDLM_last_time_step$b2[posterior_sample_indices]  # Extract sampled b2 values
b3 <- chain_TempDLM_last_time_step$b3[posterior_sample_indices]  # Extract sampled b3 values
b4 <- chain_TempDLM_last_time_step$b4[posterior_sample_indices]  # Extract sampled b4 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices]  # Extract sampled sd_add values

# Specify initial conditions

NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize observed NEE matrix

NEE_state[1, ] <- NEE_ic  # Specify initial condition for state variable

# Generate forecast

for(m in 1:ensemble_size){
  
  for(t in 1:forecast_length){

    if(t > 1){  # Condition set b/c initial condition already specified for state variable
      
      # Note:
      # NEE_state = real NEE (state variable)
      # NEE_obs = observed NEE
      
      pred <- b1[m] * NEE_state[t - 1, m] + b2[m] * (forecast_inSW[t, m] - forecast_inSW[t - 1, m]) +  b3[m] * (forecast_temp[t, m] - forecast_temp[t - 1, m]) + b4[m]
      NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
    
    }
    
    if(NEE_state[t, m] >= 0){  # Calculate sd_obs if NEE is positive
      
      sd_obs <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$nee_sd_slopeP[1] * NEE_state[t, m]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
      
    }else{  # Calculate sd_obs if NEE is negative
      
      sd_obs <- target_data_BART_sub$nee_sd_intercept[1] + target_data_BART_sub$nee_sd_slopeN[1] * NEE_state[t, m]
          
    }
      
    NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_BART)
    
  }
  
}

nee <- as.vector(t(NEE_obs))  # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)

time <- rep(NA, length(nee))  # Initialize time vector

NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time)  # Extract forecast times

# Assign forecast times

for(i in 1:forecast_length){
  
  time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
  
}

siteID <- rep("BART", length.out = length(nee))  # Create siteID vector

ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length)  # Create ensemble vector

forecast <- rep(1, length.out = length(nee))  # Create forecast vector

data_assimilation <- rep(0, length.out = length(nee))  # Create forecast vector

le <- rep(NA, length.out = length(nee))  # Create le vector

vswc <- rep(NA, length.out = length(nee))  # Create vswc vector

NEE_forecast_BART <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc)  # Create data frame with forecast data

NEE_forecast_BART$time <- ymd_hms(NEE_forecast_BART$time, tz = "UTC")  # Specify time column as POSIXct

Generate forecast for KONZ.

# Sample from temperature forecasts

NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "KONZ")  # Select NOAA forecasts for KONZ

temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
                         size = ensemble_size,
                         replace = TRUE)

forecast_inSW <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast incoming SW radiation matrix

forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast temperature matrix

for(i in 1:ensemble_size){
  
  forecast_inSW[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$inSW/sd_inSW_data_KONZ
  forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC/sd_temp_data_KONZ
  
}

# Sample from parameter posterior distribution

chain_TempDLM_last_time_step <- filter(chain_TempDLM_KONZ,  # Create tibble for MCMC results for last time step
                                       day == max(chain_TempDLM_KONZ$day))

posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
                                   size = ensemble_size,
                                   replace = TRUE)

NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices]  # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices]  # Extract sampled b1 values
b2 <- chain_TempDLM_last_time_step$b2[posterior_sample_indices]  # Extract sampled b2 values
b3 <- chain_TempDLM_last_time_step$b3[posterior_sample_indices]  # Extract sampled b3 values
b4 <- chain_TempDLM_last_time_step$b4[posterior_sample_indices]  # Extract sampled b4 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices]  # Extract sampled sd_add values

# Specify initial conditions

NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize observed NEE matrix

NEE_state[1, ] <- NEE_ic  # Specify initial condition for state variable

# Generate forecast

for(m in 1:ensemble_size){
  
  for(t in 1:forecast_length){

    if(t > 1){  # Condition set b/c initial condition already specified for state variable
      
      # Note:
      # NEE_state = real NEE (state variable)
      # NEE_obs = observed NEE
      
      pred <- b1[m] * NEE_state[t - 1, m] + b2[m] * (forecast_inSW[t, m] - forecast_inSW[t - 1, m]) +  b3[m] * (forecast_temp[t, m] - forecast_temp[t - 1, m]) + b4[m]
      NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
    
    }
    
    if(NEE_state[t, m] >= 0){  # Calculate sd_obs if NEE is positive
      
      sd_obs <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$nee_sd_slopeP[1] * NEE_state[t, m]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
      
    }else{  # Calculate sd_obs if NEE is negative
      
      sd_obs <- target_data_KONZ_sub$nee_sd_intercept[1] + target_data_KONZ_sub$nee_sd_slopeN[1] * NEE_state[t, m]
          
    }
      
    NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_KONZ)
    
  }
  
}

nee <- as.vector(t(NEE_obs))  # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)

time <- rep(NA, length(nee))  # Initialize time vector

NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time)  # Extract forecast times

# Assign forecast times

for(i in 1:forecast_length){
  
  time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
  
}

siteID <- rep("KONZ", length.out = length(nee))  # Create siteID vector

ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length)  # Create ensemble vector

forecast <- rep(1, length.out = length(nee))  # Create forecast vector

data_assimilation <- rep(0, length.out = length(nee))  # Create forecast vector

le <- rep(NA, length.out = length(nee))  # Create le vector

vswc <- rep(NA, length.out = length(nee))  # Create vswc vector

NEE_forecast_KONZ <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc)  # Create data frame with forecast data

NEE_forecast_KONZ$time <- ymd_hms(NEE_forecast_KONZ$time, tz = "UTC")  # Specify time column as POSIXct

Generate forecast for OSBS.

# Sample from temperature forecasts

NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "OSBS")  # Select NOAA forecasts for OSBS

temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
                         size = ensemble_size,
                         replace = TRUE)

forecast_inSW <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast incoming SW radiation matrix

forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast temperature matrix

for(i in 1:ensemble_size){
  
  forecast_inSW[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$inSW/sd_inSW_data_OSBS
  forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC/sd_temp_data_OSBS
  
}

# Sample from parameter posterior distribution

chain_TempDLM_last_time_step <- filter(chain_TempDLM_OSBS,  # Create tibble for MCMC results for last time step
                                       day == max(chain_TempDLM_OSBS$day))

posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
                                   size = ensemble_size,
                                   replace = TRUE)

NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices]  # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices]  # Extract sampled b1 values
b2 <- chain_TempDLM_last_time_step$b2[posterior_sample_indices]  # Extract sampled b2 values
b3 <- chain_TempDLM_last_time_step$b3[posterior_sample_indices]  # Extract sampled b3 values
b4 <- chain_TempDLM_last_time_step$b4[posterior_sample_indices]  # Extract sampled b4 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices]  # Extract sampled sd_add values

# Specify initial conditions

NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize observed NEE matrix

NEE_state[1, ] <- NEE_ic  # Specify initial condition for state variable

# Generate forecast

for(m in 1:ensemble_size){
  
  for(t in 1:forecast_length){

    if(t > 1){  # Condition set b/c initial condition already specified for state variable
      
      # Note:
      # NEE_state = real NEE (state variable)
      # NEE_obs = observed NEE
      
      pred <- b1[m] * NEE_state[t - 1, m] + b2[m] * (forecast_inSW[t, m] - forecast_inSW[t - 1, m]) +  b3[m] * (forecast_temp[t, m] - forecast_temp[t - 1, m]) + b4[m]
      NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
    
    }
    
    if(NEE_state[t, m] >= 0){  # Calculate sd_obs if NEE is positive
      
      sd_obs <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$nee_sd_slopeP[1] * NEE_state[t, m]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
      
    }else{  # Calculate sd_obs if NEE is negative
      
      sd_obs <- target_data_OSBS_sub$nee_sd_intercept[1] + target_data_OSBS_sub$nee_sd_slopeN[1] * NEE_state[t, m]
          
    }
      
    NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_OSBS)
    
  }
  
}

nee <- as.vector(t(NEE_obs))  # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)

time <- rep(NA, length(nee))  # Initialize time vector

NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time)  # Extract forecast times

# Assign forecast times

for(i in 1:forecast_length){
  
  time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
  
}

siteID <- rep("OSBS", length.out = length(nee))  # Create siteID vector

ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length)  # Create ensemble vector

forecast <- rep(1, length.out = length(nee))  # Create forecast vector

data_assimilation <- rep(0, length.out = length(nee))  # Create forecast vector

le <- rep(NA, length.out = length(nee))  # Create le vector

vswc <- rep(NA, length.out = length(nee))  # Create vswc vector

NEE_forecast_OSBS <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc)  # Create data frame with forecast data

NEE_forecast_OSBS$time <- ymd_hms(NEE_forecast_OSBS$time, tz = "UTC")  # Specify time column as POSIXct

Generate forecast for SRER.

# Sample from temperature forecasts

NOAA_temp_forecast_site <- filter(NOAA_temp_forecast, siteID == "SRER")  # Select NOAA forecasts for SRER

temp_ensembles <- sample(unique(NOAA_temp_forecast_site$ensemble),
                         size = ensemble_size,
                         replace = TRUE)

forecast_inSW <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast incoming SW radiation matrix

forecast_temp <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize forecast temperature matrix

for(i in 1:ensemble_size){
  
  forecast_inSW[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$inSW/sd_inSW_data_SRER
  forecast_temp[, i] <- filter(NOAA_temp_forecast_site, ensemble == temp_ensembles[i])$tempC/sd_temp_data_SRER
  
}

# Sample from parameter posterior distribution

chain_TempDLM_last_time_step <- filter(chain_TempDLM_SRER,  # Create tibble for MCMC results for last time step
                                       day == max(chain_TempDLM_SRER$day))

posterior_sample_indices <- sample(x = 1:length(chain_TempDLM_last_time_step$x),
                                   size = ensemble_size,
                                   replace = TRUE)

NEE_ic <- chain_TempDLM_last_time_step$x[posterior_sample_indices]  # Extract last latent state value
b1 <- chain_TempDLM_last_time_step$b1[posterior_sample_indices]  # Extract sampled b1 values
b2 <- chain_TempDLM_last_time_step$b2[posterior_sample_indices]  # Extract sampled b2 values
b3 <- chain_TempDLM_last_time_step$b3[posterior_sample_indices]  # Extract sampled b3 values
b4 <- chain_TempDLM_last_time_step$b4[posterior_sample_indices]  # Extract sampled b4 values
sd_add <- chain_TempDLM_last_time_step$sd_add[posterior_sample_indices]  # Extract sampled sd_add values

# Specify initial conditions

NEE_state <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize state variable matrix
NEE_obs <- matrix(NA, nrow = forecast_length, ncol = ensemble_size)  # Initialize observed NEE matrix

NEE_state[1, ] <- NEE_ic  # Specify initial condition for state variable

# Generate forecast

for(m in 1:ensemble_size){
  
  for(t in 1:forecast_length){

    if(t > 1){  # Condition set b/c initial condition already specified for state variable
      
      # Note:
      # NEE_state = real NEE (state variable)
      # NEE_obs = observed NEE
      
      pred <- b1[m] * NEE_state[t - 1, m] + b2[m] * (forecast_inSW[t, m] - forecast_inSW[t - 1, m]) +  b3[m] * (forecast_temp[t, m] - forecast_temp[t - 1, m]) + b4[m]
      NEE_state[t, m] <- rnorm(1, mean = pred, sd = sd_add[m])
    
    }
    
    if(NEE_state[t, m] >= 0){  # Calculate sd_obs if NEE is positive
      
      sd_obs <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$nee_sd_slopeP[1] * NEE_state[t, m]  # Note: Since intercept and slope do not change, can just use first value of intercept and slope values
      
    }else{  # Calculate sd_obs if NEE is negative
      
      sd_obs <- target_data_SRER_sub$nee_sd_intercept[1] + target_data_SRER_sub$nee_sd_slopeN[1] * NEE_state[t, m]
          
    }
      
    NEE_obs[t, m] <- rnorm(1, mean = NEE_state[t, m], sd = sd_obs * sd_obs_mod_SRER)
    
  }
  
}

nee <- as.vector(t(NEE_obs))  # Convert matrix to vector for long format (transposed so that vector is organized so that each ensemble member is nested within each time step)

time <- rep(NA, length(nee))  # Initialize time vector

NOAA_forecast_times <- unique(NOAA_temp_forecast_site$time)  # Extract forecast times

# Assign forecast times

for(i in 1:forecast_length){
  
  time[((i*ensemble_size) - (ensemble_size - 1)):(i*ensemble_size)] <- NOAA_forecast_times[i]
  
}

siteID <- rep("SRER", length.out = length(nee))  # Create siteID vector

ensemble <- rep(seq(from = 1, to = ensemble_size, by = 1), times = forecast_length)  # Create ensemble vector

forecast <- rep(1, length.out = length(nee))  # Create forecast vector

data_assimilation <- rep(0, length.out = length(nee))  # Create forecast vector

le <- rep(NA, length.out = length(nee))  # Create le vector

vswc <- rep(NA, length.out = length(nee))  # Create vswc vector

NEE_forecast_SRER <- data.frame(time, siteID, ensemble, forecast, data_assimilation, nee, le, vswc)  # Create data frame with forecast data

NEE_forecast_SRER$time <- ymd_hms(NEE_forecast_SRER$time, tz = "UTC")  # Specify time column as POSIXct

Compile forecasts and write output to .csv file.

NEE_forecast <- rbind(NEE_forecast_BART,
                      NEE_forecast_KONZ,
                      NEE_forecast_OSBS,
                      NEE_forecast_SRER)

write.csv(NEE_forecast, file = "terrestrial_30min-2021-04-01-VT_NEET.csv", row.names = FALSE)

Plot forecasts (with fitted model and observations).

# BART

# plot_NEE_forecast_BART <- NEE_forecast_BART %>%
#   group_by(time) %>%
#   summarise(mean = mean(nee),
#             lower = quantile(nee, 0.025),
#             upper = quantile(nee, 0.975),
#             .groups = "drop")
# 
# plot_TempDLM_BART <- ggplot() +
#   geom_line(data = plot_chain_TempDLM_BART, aes(x = time, y = mean), color = "black") +
#   geom_line(data = plot_NEE_forecast_BART, aes(x = time, y = mean), color = "black") +
#   geom_ribbon(data = plot_chain_TempDLM_BART, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
#               alpha = 0.2) +
#   geom_ribbon(data = plot_NEE_forecast_BART, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
#               alpha = 0.2) +
#   geom_point(data = target_data_BART_sub, aes(x = time, y = nee), color = "black") +
#   scale_fill_manual(name = "Period", values = c("gray", plot_colors[1]), labels = c("Historical", "Forecast")) +
#   scale_color_manual(name = "Period", values = c("gray", plot_colors[1]), labels = c("Historical", "Forecast")) +
#   labs(title = paste0("Site: BART | Time Period: ", substr(plot_chain_TempDLM_BART$time[1], 1, 10), " to ", substr(plot_NEE_forecast_BART$time[length(plot_NEE_forecast_BART$time)], 1, 10)),
#        x = "Time (UTC)",
#        y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
#   theme_bw()
# 
# # KONZ
# 
# plot_NEE_forecast_KONZ <- NEE_forecast_KONZ %>%
#   group_by(time) %>%
#   summarise(mean = mean(nee),
#             lower = quantile(nee, 0.025),
#             upper = quantile(nee, 0.975),
#             .groups = "drop")
# 
# plot_TempDLM_KONZ <- ggplot() +
#   geom_line(data = plot_chain_TempDLM_KONZ, aes(x = time, y = mean), color = "black") +
#   geom_line(data = plot_NEE_forecast_KONZ, aes(x = time, y = mean), color = "black") +
#   geom_ribbon(data = plot_chain_TempDLM_KONZ, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
#               alpha = 0.2) +
#   geom_ribbon(data = plot_NEE_forecast_KONZ, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
#               alpha = 0.2) +
#   geom_point(data = target_data_KONZ_sub, aes(x = time, y = nee), color = "black") +
#   scale_fill_manual(name = "Period", values = c("gray", plot_colors[2]), labels = c("Historical", "Forecast")) +
#   scale_color_manual(name = "Period", values = c("gray", plot_colors[2]), labels = c("Historical", "Forecast")) +
#   labs(title = paste0("Site: KONZ | Time Period: ", substr(plot_chain_TempDLM_KONZ$time[1], 1, 10), " to ", substr(plot_NEE_forecast_KONZ$time[length(plot_NEE_forecast_KONZ$time)], 1, 10)),
#        x = "Time (UTC)",
#        y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
#   theme_bw()
# 
# # OSBS
# 
# plot_NEE_forecast_OSBS <- NEE_forecast_OSBS %>%
#   group_by(time) %>%
#   summarise(mean = mean(nee),
#             lower = quantile(nee, 0.025),
#             upper = quantile(nee, 0.975),
#             .groups = "drop")
# 
# plot_TempDLM_OSBS <- ggplot() +
#   geom_line(data = plot_chain_TempDLM_OSBS, aes(x = time, y = mean), color = "black") +
#   geom_line(data = plot_NEE_forecast_OSBS, aes(x = time, y = mean), color = "black") +
#   geom_ribbon(data = plot_chain_TempDLM_OSBS, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
#               alpha = 0.2) +
#   geom_ribbon(data = plot_NEE_forecast_OSBS, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
#               alpha = 0.2) +
#   geom_point(data = target_data_OSBS_sub, aes(x = time, y = nee), color = "black") +
#   scale_fill_manual(name = "Period", values = c("gray", plot_colors[3]), labels = c("Historical", "Forecast")) +
#   scale_color_manual(name = "Period", values = c("gray", plot_colors[3]), labels = c("Historical", "Forecast")) +
#   labs(title = paste0("Site: OSBS | Time Period: ", substr(plot_chain_TempDLM_OSBS$time[1], 1, 10), " to ", substr(plot_NEE_forecast_OSBS$time[length(plot_NEE_forecast_OSBS$time)], 1, 10)),
#        x = "Time (UTC)",
#        y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
#   theme_bw()
# 
# # SRER
# 
# plot_NEE_forecast_SRER <- NEE_forecast_SRER %>%
#   group_by(time) %>%
#   summarise(mean = mean(nee),
#             lower = quantile(nee, 0.025),
#             upper = quantile(nee, 0.975),
#             .groups = "drop")
# 
# plot_TempDLM_SRER <- ggplot() +
#   geom_line(data = plot_chain_TempDLM_SRER, aes(x = time, y = mean), color = "black") +
#   geom_line(data = plot_NEE_forecast_SRER, aes(x = time, y = mean), color = "black") +
#   geom_ribbon(data = plot_chain_TempDLM_SRER, aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"),
#               alpha = 0.2) +
#   geom_ribbon(data = plot_NEE_forecast_SRER, aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"),
#               alpha = 0.2) +
#   geom_point(data = target_data_SRER_sub, aes(x = time, y = nee), color = "black") +
#   scale_fill_manual(name = "Period", values = c("gray", plot_colors[4]), labels = c("Historical", "Forecast")) +
#   scale_color_manual(name = "Period", values = c("gray", plot_colors[4]), labels = c("Historical", "Forecast")) +
#   labs(title = paste0("Site: SRER | Time Period: ", substr(plot_chain_TempDLM_SRER$time[1], 1, 10), " to ", substr(plot_NEE_forecast_SRER$time[length(plot_NEE_forecast_SRER$time)], 1, 10)),
#        x = "Time (UTC)",
#        y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
#   theme_bw()

#Plot observations and forecast for a given location:
PlotObsAndForecast <- function(siteID, nee_forecast, plot_chain_TempDLM, target_data_sub)
{
  #NEE_forecast_SRER = nee_forecast
  #plot_chain_TempDLM_SRER = plot_chain_TempDLM
  #plot_NEE_forecast_SRER = plot_NEE_forecast
  #target_data_SRER_sub = target_data_sub
  
  plot_NEE_forecast <- nee_forecast %>%
  group_by(time) %>%
  summarise(mean = mean(nee),
            lower = quantile(nee, 0.025),
            upper = quantile(nee, 0.975),
            .groups = "drop")
  
  title = paste0("Site: ", siteID, " | Time Period: ", substr(plot_chain_TempDLM$time[1], 1, 10), " to ",
                substr(plot_NEE_forecast$time[length(plot_NEE_forecast$time)], 1, 10))
  
  plot = ggplot() +
    geom_line(data = plot_chain_TempDLM, aes(x = time, y = mean), color = "black") +
    geom_line(data = plot_NEE_forecast, aes(x = time, y = mean), color = "black") +
    geom_ribbon(data = plot_chain_TempDLM,
                aes(x = time, ymin = lower, ymax = upper, color = "1", fill = "1"), alpha = 0.2) +
    geom_ribbon(data = plot_NEE_forecast,
                aes(x = time, ymin = lower, ymax = upper, color = "2", fill = "2"), alpha = 0.2) +
    geom_point(data = target_data_sub, aes(x = time, y = nee), color = "black") +
    scale_fill_manual(name = "Period", values = c("gray", plot_colors[siteID]),
                      labels = c("Historical", "Forecast")) +
    scale_color_manual(name = "Period", values = c("gray", plot_colors[siteID]),
                       labels = c("Historical", "Forecast")) +
    labs(title = title, x = "Time (UTC)",
       y = expression(paste("NEE (",mu, "mol" ~ CO[2] ~ m^-2 ~ s^-1,")"))) +
    theme_bw()
  
  print(plot)
}

#plot_TempDLM_BART
#plot_TempDLM_KONZ
#plot_TempDLM_OSBS
#plot_TempDLM_SRER

PlotObsAndForecast(site = "BART", NEE_forecast_BART, plot_chain_TempDLM_BART, target_data_BART_sub)

PlotObsAndForecast(site = "KONZ", NEE_forecast_KONZ, plot_chain_TempDLM_KONZ, target_data_KONZ_sub)

PlotObsAndForecast(site = "OSBS", NEE_forecast_OSBS, plot_chain_TempDLM_OSBS, target_data_OSBS_sub)

PlotObsAndForecast(site = "SRER", NEE_forecast_SRER, plot_chain_TempDLM_SRER, target_data_SRER_sub)